knitr::opts_chunk$set(echo = FALSE) library(devtools) library(ggplot2) library(plotly) library(knitr) library(tidyverse) load_all()
Figure otu numerical problems in calculating the D-optimal design for a td2pLL model.
The model:
my_theta = c(h=2, delta=0.2, gamma=1.3, c0=0.2) plot_td2pLL(td2pLL_coefs = my_theta, dose_lim = c(0, 1), time_lim = c(1, 10), xaxis_scale = "linear")
The model formula is
[f(d, t) = 100 - 100 \frac{d^h}{ED_{50}(t)^h + d^h}] with [ED_{50}(t) = \Delta \cdot t^{-\gamma} + C_0]
We now calculate (optimal) designs with various number of support points and settings for the particle swarm optimization algorithm.
For the intercept however, we assume that is is not fixed, i.e. $E_0$ is not 100. This is required for the optimal design calculation as otherwise, no design points at dose=0 will be contained in the optimal design.
We vary the number of support points between
and vary the number iterations between
At each combination, we use, for the beginning
only for a first impression on instability.
# set.seed(1905) # # all_k <- 4:10 # all_rep <- 1:3 # all_num_it <- c(100, 200, 300) # # all_combs <- expand.grid(k = all_k, # num_it = all_num_it, # rep = all_rep) # # # comb <- all_combs[9,] %>% unlist # sim_res <- apply(all_combs, 1, function(comb){ # opt_des <- opt_des_td2pLL(nPoints = comb["k"], # theta = my_theta, # control = list(numIt = comb["num_it"], # numPart = 300, setProgressBar = FALSE)) # # opt_des_df <- cbind( # k = comb["k"], # num_it = comb["num_it"], # rep = comb["rep"], # time = opt_des$supPoints[, 1], # dose = opt_des$supPoints[, 2], # weights = opt_des$weights, # log10_value = log10(max(opt_des$value, 1))) # # return(opt_des_df) # })
# sim_res <- do.call(rbind.data.frame, sim_res) # # sim_res <- sim_res %>% group_by(k, num_it, rep) %>% # mutate(sim_id = group_indices()) %>% # arrange(k, num_it, rep) %>% # select(sim_id, everything()) %>% # ungroup() # # sim_res <- sim_res %>% mutate_at(1:4, as.character) %>% # mutate(k = factor(k, levels = all_k)) # # save(sim_res, file = "optDes_sim_res_01.RData") load("optDes_sim_res_01.RData")
Optimal designs for different $k$ and number of iterations For each combination there are three replicates. All three replicates are in one plot (distinguished with colors).
One can see that the algirithm has highly varying results.
sim_res %>% ggplot(., aes(x = dose, y = time, size = weights, color = rep)) + geom_point(alpha = 0.5) + scale_size_continuous(limits = c(0, 1)) + facet_grid( k~ num_it)
Unfortunately, there is no clear pattern visible \textit{on first glance}.
In the following, we look at the 10 results with the largest value for the D-optimality criterion (on $\log_{10}$ scale):
n_top <- 10 n_top_des <- sim_res %>% arrange(by = desc(log10_value)) %>% group_by(log10_value, sim_id) %>% mutate(group_id = cur_group_id()) %>% ungroup() %>% filter(group_id >= max(group_id) - n_top + 1) n_top_des %>% ggplot(., aes(x = dose, y = time, size = weights, color = rep)) + geom_point(alpha = 0.5) + scale_size_continuous(limits = c(0, 1)) + facet_grid( k~ num_it)
The very best design seems to be this one $k=7$ and 300 iterations:
top_des <- n_top_des %>% filter(log10_value == max(log10_value)) p <- ggplot(top_des, aes(x = dose, y = time, size = weights, color = rep)) + geom_point(alpha = 0.5) + scale_size_continuous(limits = c(0, 1)) + facet_grid( k~ num_it, labeller = "label_both") + labs(title = paste0("Log10 D-crit value: ", round(top_des$log10_value[1], 4))) ggplotly(p)
The equivalence plot for this design:
des_to_list <- function(des_df){ des_list <- list( value = des_df$log10_value[1], weights = des_df$weights, supPoints = as.data.frame(des_df[, c("time", "dose")]) ) return(des_list) }
par(mfrow = c(2, 2)) top_des_equ <- dcrit_equ_plot_td2pLL(des = des_to_list(top_des), theta = my_theta, n_grid = 50) dcrit_equ_plot_td2pLL(des = des_to_list(top_des), theta = my_theta, return_values = F, plot_phi = 30, plot_theta = 160, n_grid = 50) dcrit_equ_plot_td2pLL(des = des_to_list(top_des), theta = my_theta, return_values = F, plot_phi = 30, plot_theta = 120, n_grid = 50) dcrit_equ_plot_td2pLL(des = des_to_list(top_des), theta = my_theta, return_values = F, plot_phi = 30, plot_theta = 60, n_grid = 50)
\textbf{Seven points actualy seems to be correct!} Six "peaks" in the surface and one point alongside the time=0 edge!!!
Also, the optimal weights seem to be: 1/5, 2/15, 2/15, ..., 2/15
We add a second analysis where we fix $k=7$ as number of support points and instead vary the - number of iterations in {200, 300} and the - number of particles in {50, 100, 300}
and allow us to have - 5 repicates
# set.seed(1905) # # all_k <- 7 # all_rep <- 1:5 # all_num_it <- c(200, 300) # all_num_part <- c(50, 100, 300) # # all_combs <- expand.grid(k = all_k, # num_it = all_num_it, # num_part = all_num_part, # rep = all_rep) # # # comb <- all_combs[1,] %>% unlist # sim_02_res <- apply(all_combs, 1, function(comb){ # opt_des <- opt_des_td2pLL(nPoints = comb["k"], # theta = my_theta, # control = list(numIt = comb["num_it"], # numPart = comb["num_part"], # setProgressBar = FALSE)) # # opt_des_df <- cbind( # k = comb["k"], # num_it = comb["num_it"], # num_part = comb["num_part"], # rep = comb["rep"], # time = opt_des$supPoints[, 1], # dose = opt_des$supPoints[, 2], # weights = opt_des$weights, # log10_value = log10(max(opt_des$value, 1))) %>% # as.data.frame() # # return(opt_des_df) # })
# sim_02_res <- do.call(rbind.data.frame, sim_02_res) # # sim_02_res <- sim_02_res %>% group_by(num_it, num_part, rep) %>% # mutate(sim_id = group_indices()) %>% # arrange(num_it, num_part, rep) %>% # select(sim_id, everything()) %>% # ungroup() # # sim_02_res <- sim_02_res %>% mutate_at(1:5, as.character) %>% # mutate(num_part = factor(num_part, levels = all_num_part)) # # save(sim_02_res, file = "optDes_sim_res_02.RData") load("optDes_sim_res_02.RData")
p <- sim_02_res %>% ggplot(., aes(x = dose, y = time, size = weights, color = rep, shape = rep)) + geom_point(alpha = 0.5) + scale_size_continuous(limits = c(0, 0.5)) + facet_grid( num_it~num_part, labeller = "label_both") ggplotly(p)
Again: Very unclear pattern.
Design with best D-criterion:
top_des <- sim_02_res %>% filter(log10_value == max(log10_value)) p <- ggplot(top_des, aes(x = dose, y = time, size = weights, color = rep)) + geom_point(alpha = 0.5) + scale_size_continuous(limits = c(0, 1)) + facet_grid( num_it~num_part, labeller = "label_both") + labs(title = paste0("Log10 D-crit value: ", round(top_des$log10_value[1], 4))) ggplotly(p)
Does not seem to be optimal, as it does not have the most likely optimal structure described above.
Corresponding equivalence plot: Clearly not optimal!
par(mfrow = c(2, 2)) top_des_equ <- dcrit_equ_plot_td2pLL(des = des_to_list(top_des), theta = my_theta, n_grid = 50) dcrit_equ_plot_td2pLL(des = des_to_list(top_des), theta = my_theta, return_values = F, plot_phi = 30, plot_theta = 160, n_grid = 50) dcrit_equ_plot_td2pLL(des = des_to_list(top_des), theta = my_theta, return_values = F, plot_phi = 30, plot_theta = 120, n_grid = 50) dcrit_equ_plot_td2pLL(des = des_to_list(top_des), theta = my_theta, return_values = F, plot_phi = 30, plot_theta = 60, n_grid = 50)
We redo the simulation study 2 but with fixed weights that we assume to be right: 1/5, 2/15, 2/15, ..., 2/15.
# set.seed(1905) # # all_k <- 7 # all_rep <- 1:5 # all_num_it <- c(200, 300) # all_num_part <- c(50, 100, 300) # fixed_weights <- c(1/5, rep(2/15, 6)) # # all_combs <- expand.grid(k = all_k, # num_it = all_num_it, # num_part = all_num_part, # rep = all_rep) # # # comb <- all_combs[1,] %>% unlist # sim_03_res <- apply(all_combs, 1, function(comb){ # opt_des <- opt_des_td2pLL(nPoints = comb["k"], # theta = my_theta, # wFixed = fixed_weights, # control = list(numIt = comb["num_it"], # numPart = comb["num_part"], # setProgressBar = FALSE)) # # opt_des_df <- cbind( # k = comb["k"], # num_it = comb["num_it"], # num_part = comb["num_part"], # rep = comb["rep"], # time = opt_des$supPoints[, 1], # dose = opt_des$supPoints[, 2], # weights = opt_des$weights, # log10_value = log10(max(opt_des$value, 1))) %>% # as.data.frame() # # return(opt_des_df) # })
# sim_03_res <- do.call(rbind.data.frame, sim_03_res) # # sim_03_res <- sim_03_res %>% group_by(num_it, num_part, rep) %>% # mutate(sim_id = cur_group_id()) %>% # arrange(num_it, num_part, rep) %>% # select(sim_id, everything()) %>% # ungroup() # # sim_03_res <- sim_03_res %>% mutate_at(1:5, as.character) %>% # mutate(num_part = factor(num_part, levels = all_num_part)) # # save(sim_03_res, file = "optDes_sim_res_03.RData") load("optDes_sim_res_03.RData")
p <- sim_03_res %>% ggplot(., aes(x = dose, y = time, size = weights, color = rep, shape = rep)) + geom_point(alpha = 0.5) + scale_size_continuous(limits = c(0, 0.5)) + facet_grid( num_it~num_part, labeller = "label_both") ggplotly(p)
Might be better! Let's look at the best (by D-criterion) result and its equivalence plot:
top_des <- sim_03_res %>% filter(log10_value == max(log10_value)) p <- ggplot(top_des, aes(x = dose, y = time, size = weights, color = rep)) + geom_point(alpha = 0.5) + scale_size_continuous(limits = c(0, 1)) + facet_grid( num_it~num_part, labeller = "label_both") + labs(title = paste0("Log10 D-crit value: ", round(top_des$log10_value[1], 4))) ggplotly(p)
Same value (9.6074) for D-criterion as best design of SImulation 1! Same pattern, but as presumed, the time value for dose=0 is arbitrary, it just has to obtain the bigger weight: 0.20.
n_top <- 10 n_top_des <- sim_03_res %>% arrange(by = desc(log10_value)) %>% group_by(log10_value, sim_id) %>% mutate(group_id = cur_group_id()) %>% ungroup() %>% filter(group_id >= max(group_id) - n_top + 1) p <- n_top_des %>% ggplot(., aes(x = dose, y = time, size = weights, color = rep, label = log10_value)) + geom_point(alpha = 0.5) + scale_size_continuous(limits = c(0, 1)) + facet_grid( num_it~num_part, labeller = "label_both") ggplotly(p)
300 particles and 300 iterations seems to be the best option.
Equivalence plot of the best design of simulation 3:
par(mfrow = c(2, 2)) top_des_equ <- dcrit_equ_plot_td2pLL(des = des_to_list(top_des), theta = my_theta, n_grid = 50) dcrit_equ_plot_td2pLL(des = des_to_list(top_des), theta = my_theta, return_values = F, plot_phi = 30, plot_theta = 160, n_grid = 50) dcrit_equ_plot_td2pLL(des = des_to_list(top_des), theta = my_theta, return_values = F, plot_phi = 30, plot_theta = 120, n_grid = 50) dcrit_equ_plot_td2pLL(des = des_to_list(top_des), theta = my_theta, return_values = F, plot_phi = 30, plot_theta = 60, n_grid = 50)
Additionally try with a model where I guess it is easier to estimate the parameters (because of a stronger influence of time).
my_theta_2 = c(h=2, delta=0.7, gamma=0.5, c0=0.1) plot_td2pLL(td2pLL_coefs = my_theta_2, dose_lim = c(0, 1), time_lim = c(1, 10), xaxis_scale = "linear")
# set.seed(1905) # # all_k <- 7 # all_rep <- 1:5 # all_num_it <- c(200, 300) # all_num_part <- c(50, 100, 300) # fixed_weights <- c(1/5, rep(2/15, 6)) # # all_combs <- expand.grid(k = all_k, # num_it = all_num_it, # num_part = all_num_part, # rep = all_rep) # # # comb <- all_combs[1,] %>% unlist # sim_04_res <- apply(all_combs, 1, function(comb){ # opt_des <- opt_des_td2pLL(nPoints = comb["k"], # theta = my_theta_2, # wFixed = fixed_weights, # control = list(numIt = comb["num_it"], # numPart = comb["num_part"], # setProgressBar = FALSE)) # # opt_des_df <- cbind( # k = comb["k"], # num_it = comb["num_it"], # num_part = comb["num_part"], # rep = comb["rep"], # time = opt_des$supPoints[, 1], # dose = opt_des$supPoints[, 2], # weights = opt_des$weights, # log10_value = log10(max(opt_des$value, 1))) %>% # as.data.frame() # # return(opt_des_df) # })
# sim_04_res <- do.call(rbind.data.frame, sim_04_res) # # sim_04_res <- sim_04_res %>% group_by(num_it, num_part, rep) %>% # mutate(sim_id = cur_group_id()) %>% # arrange(num_it, num_part, rep) %>% # select(sim_id, everything()) %>% # ungroup() # # sim_04_res <- sim_04_res %>% mutate_at(1:5, as.character) %>% # mutate(num_part = factor(num_part, levels = all_num_part)) # # save(sim_04_res, file = "optDes_sim_res_04.RData") load("optDes_sim_res_04.RData")
p <- sim_04_res %>% ggplot(., aes(x = dose, y = time, size = weights, color = rep, shape = rep, label = log10_value)) + geom_point(alpha = 0.5) + scale_size_continuous(limits = c(0, 0.5)) + facet_grid( num_it~num_part, labeller = "label_both") ggplotly(p)
Also with the presumably easier model, the results are highly varying. We look at the 10 best results based on the D-optimality criterion:
n_top <- 10 n_top_des <- sim_04_res %>% arrange(by = desc(log10_value)) %>% group_by(log10_value, sim_id) %>% mutate(group_id = cur_group_id()) %>% ungroup() %>% filter(group_id >= max(group_id) - n_top + 1) p <- n_top_des %>% ggplot(., aes(x = dose, y = time, size = weights, color = rep, label = log10_value)) + geom_point(alpha = 0.5) + scale_size_continuous(limits = c(0, 1)) + facet_grid( num_it~num_part, labeller = "label_both") ggplotly(p)
The design fulfils the structure.
top_des <- sim_04_res %>% filter(log10_value == max(log10_value)) p <- ggplot(top_des, aes(x = dose, y = time, size = weights, color = rep)) + geom_point(alpha = 0.5) + scale_size_continuous(limits = c(0, 1)) + facet_grid( num_it~num_part, labeller = "label_both") + labs(title = paste0("Log10 D-crit value: ", round(top_des$log10_value[1], 4))) ggplotly(p)
And it is optimal:
par(mfrow = c(2, 2)) top_des_equ <- dcrit_equ_plot_td2pLL(des = des_to_list(top_des), theta = my_theta_2, n_grid = 50) dcrit_equ_plot_td2pLL(des = des_to_list(top_des), theta = my_theta_2, return_values = F, plot_phi = 30, plot_theta = 160, n_grid = 50) dcrit_equ_plot_td2pLL(des = des_to_list(top_des), theta = my_theta_2, return_values = F, plot_phi = 30, plot_theta = 120, n_grid = 50) dcrit_equ_plot_td2pLL(des = des_to_list(top_des), theta = my_theta_2, return_values = F, plot_phi = 30, plot_theta = 60, n_grid = 50)
The plot is not fully below zero. The peak is at the in-between time value. It might be, because they are not even levelled (on the time-axis).
Also try: Very large number of iterations and particles:
# set.seed(1905) # # all_k <- 7 # all_rep <- 1:10 # all_num_it <- 1000 # all_num_part <- 1000 # fixed_weights <- c(1/5, rep(2/15, 6)) # # all_combs <- expand.grid(k = all_k, # num_it = all_num_it, # num_part = all_num_part, # rep = all_rep) # # # comb <- all_combs[1,] %>% unlist # sim_05_res <- apply(all_combs, 1, function(comb){ # opt_des <- opt_des_td2pLL(nPoints = comb["k"], # theta = my_theta_2, # wFixed = fixed_weights, # control = list(numIt = comb["num_it"], # numPart = comb["num_part"], # setProgressBar = FALSE)) # # opt_des_df <- cbind( # k = comb["k"], # num_it = comb["num_it"], # num_part = comb["num_part"], # rep = comb["rep"], # time = opt_des$supPoints[, 1], # dose = opt_des$supPoints[, 2], # weights = opt_des$weights, # log10_value = log10(max(opt_des$value, 1))) %>% # as.data.frame() # # return(opt_des_df) # })
# sim_05_res <- do.call(rbind.data.frame, sim_05_res) # # sim_05_res <- sim_05_res %>% group_by(num_it, num_part, rep) %>% # mutate(sim_id = cur_group_id()) %>% # arrange(num_it, num_part, rep) %>% # select(sim_id, everything()) %>% # ungroup() # # sim_05_res <- sim_05_res %>% mutate_at(1:5, as.character) %>% # mutate(num_part = factor(num_part, levels = all_num_part)) # # save(sim_05_res, file = "optDes_sim_res_05.RData") load("optDes_sim_res_05.RData")
For 4 out of 10 replications, the optial design was achieved now (Replication 1, 5, 6, 7).
Also: Seems like time value is not the same for the two dose values in between!
p <- sim_05_res %>% ggplot(., aes(x = dose, y = time, size = weights, color = rep, label = log10_value)) + geom_point(alpha = 0.5) + scale_size_continuous(limits = c(0, 0.5)) ggplotly(p)
top_des <- sim_05_res %>% filter(log10_value == max(log10_value)) par(mfrow = c(2, 2)) top_des_equ <- dcrit_equ_plot_td2pLL(des = des_to_list(top_des), theta = my_theta_2, n_grid = 50) dcrit_equ_plot_td2pLL(des = des_to_list(top_des), theta = my_theta_2, return_values = F, plot_phi = 30, plot_theta = 160, n_grid = 50) dcrit_equ_plot_td2pLL(des = des_to_list(top_des), theta = my_theta_2, return_values = F, plot_phi = 30, plot_theta = 120, n_grid = 50) dcrit_equ_plot_td2pLL(des = des_to_list(top_des), theta = my_theta_2, return_values = F, plot_phi = 30, plot_theta = 60, n_grid = 50)
Weird that the equivalence plot suggests non-optimality.
Try again with non-fixed weights and iterations=particles=1000:
# # set.seed(1905) # # all_k <- 7 # all_rep <- 1:10 # all_num_it <- 1000 # all_num_part <- 1000 # # # all_combs <- expand.grid(k = all_k, # num_it = all_num_it, # num_part = all_num_part, # rep = all_rep) # # # comb <- all_combs[1,] %>% unlist # sim_06_res <- apply(all_combs, 1, function(comb){ # opt_des <- opt_des_td2pLL(nPoints = comb["k"], # theta = my_theta_2, # wFixed = NULL, # control = list(numIt = comb["num_it"], # numPart = comb["num_part"], # setProgressBar = FALSE)) # # opt_des_df <- cbind( # k = comb["k"], # num_it = comb["num_it"], # num_part = comb["num_part"], # rep = comb["rep"], # time = opt_des$supPoints[, 1], # dose = opt_des$supPoints[, 2], # weights = opt_des$weights, # log10_value = log10(max(opt_des$value, 1))) %>% # as.data.frame() # # return(opt_des_df) # }) # # ``` # # # ```r # sim_06_res <- do.call(rbind.data.frame, sim_06_res) # # sim_06_res <- sim_06_res %>% group_by(num_it, num_part, rep) %>% # mutate(sim_id = cur_group_id()) %>% # arrange(num_it, num_part, rep) %>% # select(sim_id, everything()) %>% # ungroup() # # sim_06_res <- sim_06_res %>% mutate_at(1:5, as.character) %>% # mutate(num_part = factor(num_part, levels = all_num_part)) # # save(sim_06_res, file = "optDes_sim_res_06.RData") load("optDes_sim_res_06.RData")
Top design is replicate 3: Here, it looks again like even levelled in-between time-value is good.
p <- sim_06_res %>% ggplot(., aes(x = dose, y = time, size = weights, color = rep, label = log10_value)) + geom_point(alpha = 0.5) + scale_size_continuous(limits = c(0, 0.5)) ggplotly(p)
top_des <- sim_06_res %>% filter(log10_value == max(log10_value)) par(mfrow = c(2, 2)) top_des_equ <- dcrit_equ_plot_td2pLL(des = des_to_list(top_des), theta = my_theta_2, n_grid = 50) dcrit_equ_plot_td2pLL(des = des_to_list(top_des), theta = my_theta_2, return_values = F, plot_phi = 30, plot_theta = 160, n_grid = 50) dcrit_equ_plot_td2pLL(des = des_to_list(top_des), theta = my_theta_2, return_values = F, plot_phi = 30, plot_theta = 120, n_grid = 50) dcrit_equ_plot_td2pLL(des = des_to_list(top_des), theta = my_theta_2, return_values = F, plot_phi = 30, plot_theta = 60, n_grid = 50)
Incorporating the structure of having:
two points at lowest time, two at highest, two in between (but at the same time value!) might improve the algorithm!
Also: Deep dive into the particle swarm optimizer to better understand the role of the hyperparameters beta and gamma!
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.